package LayerD.CodeDOM;

import java.util.*;
import java.io.IOException;

/**
 * This class implements all the methods of the Tokenizer interface, and
 * defines two new abstract methods, {@link #createBuffer} and
 * {@link #fillBuffer} which all concrete subclasses must implement.
 * By default, instances of this class can handle tokens of up to 16*1024
 * characters in length.
 * @author David Flanagan
 */
public abstract class XplAbstractTokenizer implements XplTokenizer {
    boolean skipSpaces;
    boolean tokenizeSpaces;
    boolean tokenizeNumbers;
    boolean tokenizeWords;
    boolean testquotes;
    XplTokenizer.WordRecognizer wordRecognizer;
    Map keywordMap;
    String openquotes, closequotes;
    boolean trackPosition;

    int maximumTokenLength = 16 * 1024;

    int tokenType = BOF;
    int tokenLine = 0; 
    int tokenColumn = 0; 
    int tokenKeyword = -1;

    int line=0, column=0;  // The line and column numbers of text[p]

    // The name of this field is a little misleading. If eof is true, it
    // means that no more characters are available. But tokenType and tokenText
    // may still be valid until the next call to next( ), nextChar( ), or scan( ).
    boolean eof;           // Set to the return value of fillBuffer( )

    // The following fields keep track of the tokenizer's state
    // Invariant:  tokenStart <= tokenEnd <= p <= numChars <= text.length

    /**
     * The start of the current token in {@link #text}.
     * Subclasses may need to update this field in {@link #fillBuffer}.
     */
    protected int tokenStart = 0;

    /**
     * The index in {@link #text} of the first character after the current
     * token. Subclasses may need to update this field in {@link #fillBuffer}.
     */
    protected int tokenEnd = 0;

    /**
     * The position of the first untokenized character in {@link #text}.
     * Subclasses may need to update this field in {@link #fillBuffer}.
     */
    protected int p = 0;

    /**
     * The number of valid characters of input text stored in {@link #text}.
     * Subclasses must implement {@link #createBuffer} and {@link #fillBuffer}
     * to set this value appropriately.
     */
    protected int numChars = 0;

    /**
     * A buffer holding the text we're parsing.  Subclasses must implement
     * {@link #createBuffer} to set this field to a character array, and
     * {@link #fillBuffer} to refill the array.
     */
    protected char[  ] text = null;

    /**
     * Create the {@link #text} buffer to use for parsing.  This method may
     * put text in the buffer, but it is not required to.  In either case, it
     * should set {@link #numChars} appropriately.  This method will be called
     * once, before tokenizing begins.
     * 
     * @param bufferSize the minimum size of the created array, unless the 
     * subclass knows in advance that the input text is smaller than this, in 
     * which case, the input text size may be used instead.
     * @see #fillBuffer
     */
    protected abstract void createBuffer(int bufferSize);

    /**
     * Fill or refill the {@link #text} buffer and adjust related fields.
     * This method will be called when the tokenizer needs more characters to
     * tokenize. Concrete subclasses must implement this method to put
     * characters into the @{link #text} buffer, blocking if necessary to wait
     * for characters to become available.  This method may make room in the
     * buffer by shifting the contents down to remove any characters before
     * tokenStart.  It must preserve any characters after {@link #tokenStart}
     * and before {@link #numChars}, however.  After such a shift, it must
     * adjust {@link #tokenStart}, {@link #tokenEnd} and {@link #p}
     * appropriately.  After the optional shift, the method should add as many
     * new characters as possible to {@link #text} (and always at least 1) and
     * adjust {@link #numChars} appropriately.
     * 
     * @return false if no more characters are available; true otherwise.
     * @see #createBuffer
     */
    protected abstract boolean fillBuffer( ) throws IOException;

    public XplTokenizer skipSpaces(boolean skip) {
        skipSpaces = skip;
        return this;
    }

    public XplTokenizer tokenizeSpaces(boolean tokenize) {
        tokenizeSpaces = tokenize;
        return this;
    }

    public XplTokenizer tokenizeNumbers(boolean tokenize) {
        tokenizeNumbers = tokenize;
        return this;
    }
    
    public XplTokenizer tokenizeWords(boolean tokenize) {
        tokenizeWords = tokenize;
        return this;
    }

    public XplTokenizer wordRecognizer(XplTokenizer.WordRecognizer wordRecognizer) {
        this.wordRecognizer = wordRecognizer;
        return this;
    }

    public XplTokenizer quotes(String openquotes, String closequotes) {
        if (openquotes == null || closequotes == null) 
            throw new NullPointerException("arguments must be non-null");
        if (openquotes.length( ) != closequotes.length( )) 
            throw new IllegalArgumentException("argument lengths differ");
        this.openquotes = openquotes;
        this.closequotes = closequotes;
        this.testquotes = openquotes.length( ) > 0;
        return this;
    }

    public XplTokenizer trackPosition(boolean track) {
        if (text != null) throw new IllegalStateException( );
        trackPosition = track;
        return this;
    }

    public XplTokenizer keywords(String[  ] keywords) {
        if (keywords != null) {
            keywordMap = new HashMap(keywords.length);
            for(int i = 0; i < keywords.length; i++) 
                keywordMap.put(keywords[i], new Integer(i));
        }
        else keywordMap = null;
        return this;
    }

    public XplTokenizer maximumTokenLength(int size) {
        if (size < 1) throw new IllegalArgumentException( );
        if (text != null) throw new IllegalStateException( );
        maximumTokenLength = size;
        return this;
    }

    public int tokenType( ) { return tokenType; }

    public String tokenText( ) {
        if (text == null || tokenStart >= numChars) return null;
        return new String(text, tokenStart, tokenEnd-tokenStart);
    }

    public int tokenLine( ) {
        if (trackPosition && tokenStart < numChars) return tokenLine;
        else return 0;
    }

    public int tokenColumn( ) {
        if (trackPosition && tokenStart < numChars) return tokenColumn;
        else return 0;
    }

    public int tokenKeyword( ) {
        if (tokenType == KEYWORD) return tokenKeyword;
        else return -1;
    }
                                 
    public int next( ) throws IOException {
        int quoteindex;
        beginNewToken( );
        if (eof) return tokenType = EOF;

        char c = text[p];

        if ((skipSpaces||tokenizeSpaces) && Character.isWhitespace(c)) {
            tokenType = SPACE;
            do {
                if (trackPosition) updatePosition(text[p]);
                p++;
                if (p >= numChars) eof = !fillBuffer( );
            } while(!eof && Character.isWhitespace(text[p]));

            // If we don't return space tokens, then recursively call 
            // this method to find another token. Note that the next character
            // is not space, so we will not get into infinite recursion
            if (skipSpaces) return next( );
            tokenEnd = p;
        }
        else if (tokenizeNumbers && Character.isDigit(c)) {
            tokenType = NUMBER;
            do {
                if (trackPosition) column++;
                p++;
                if (p >= numChars) eof = !fillBuffer( );
            } while(!eof && Character.isDigit(text[p]));
            tokenEnd = p;
        }
        else if (tokenizeWords && 
                 (wordRecognizer!=null
                      ?wordRecognizer.isWordStart(c)
                      :Character.isJavaIdentifierStart(c))) {
            tokenType = WORD;
            do {
                if (trackPosition) column++;
                p++;
                if (p >= numChars) eof = !fillBuffer( );
            } while(!eof &&
                    (wordRecognizer!=null
                         ?wordRecognizer.isWordPart(text[p], c)
                         :Character.isJavaIdentifierPart(text[p])));

            if (keywordMap != null) {
                String ident = new String(text,tokenStart,p-tokenStart);
                Integer index = (Integer) keywordMap.get(ident);
                if (index != null) {
                    tokenType = KEYWORD;
                    tokenKeyword = index.intValue( );
                }
            }
            tokenEnd = p;
        }
        else if (testquotes && (quoteindex = openquotes.indexOf(c)) != -1) {
            // Notes: we do not recognize any escape characters.
            // We do not include the opening or closing quote.
            // We do not report an error on EOF or OVERFLOW.
            if (trackPosition) column++;
            p++;
            // Scan until we find a matching quote, but do not include
            // the opening or closing quote.  Set the token type to the 
            // opening delimiter
            char closequote = closequotes.charAt(quoteindex);
            scan(closequote, false, false, true);
            tokenType = c;
            // the call to scan set tokenEnd, so we don't have to
        }
        else {
            // Otherwise, the character itself is the token
            if (trackPosition) updatePosition(text[p]);
            tokenType = text[p];
            p++;
            tokenEnd = p;
        }
            
        // Check the invariants before returning
        if(!(text != null && 0 <= tokenStart && tokenStart <= tokenEnd && 
            tokenEnd <= p && p <= numChars && numChars <= text.length)) {
            System.out.println("Invariantes erroneos. Error en AbstractTokenizer. INTERNO.");
            System.exit(1);
        }
        
        return tokenType;
    }

    public int nextChar( ) throws IOException {
        beginNewToken( );
        if (eof) return tokenType = EOF;
        tokenType = text[p];
        if (trackPosition) updatePosition(text[p]);
        tokenEnd = ++p;
        // Check the invariants before returning
        if(!(text != null && 0 <= tokenStart && tokenStart <= tokenEnd && 
            tokenEnd <= p && p <= numChars && numChars <= text.length)){
            throw new IOException("Invariantes erroneos 2.");
            //System.exit(1);
        }
        
        return tokenType;
    }

    public int scan(char delimiter, boolean extendCurrentToken,
                    boolean includeDelimiter, boolean skipDelimiter)
        throws IOException 
    {
        return scan(new char[  ] { delimiter }, false,
                    extendCurrentToken, includeDelimiter, skipDelimiter);
    }

    public int scan(String delimiter, boolean matchall,
                    boolean extendCurrentToken,
                    boolean includeDelimiter, boolean skipDelimiter)
        throws IOException 
    {
        return scan(delimiter.toCharArray( ), matchall,
                    extendCurrentToken, includeDelimiter, skipDelimiter);
    }

    protected int scan(char[  ] delimiter, 
                       boolean matchall, boolean extendCurrentToken,
                       boolean includeDelimiter, boolean skipDelimiter)
        throws IOException 
    {
        if (matchall && !includeDelimiter && !skipDelimiter) 
            throw new IllegalArgumentException("must include or skip " +
                                          "delimiter when matchall is true");

        if (extendCurrentToken) ensureChars( );
        else beginNewToken( );

        tokenType = TEXT; // Even if return value differs
        if (eof) return EOF;

        int delimiterMatchIndex = 0;
        String delimString = null;
        if (!matchall && delimiter.length > 0)
            delimString = new String(delimiter);

        while(!eof) {
            // See if we've found the delimiter.  There are 3 cases here:
            // 1) single-character delimiter
            // 2) multi-char delimiter, and all must be matched sequentially
            // 3) multi-char delimiter, must match any one of them.
            if (delimiter.length == 1) {
                if (text[p] == delimiter[0]) break;
            }
            else if (matchall) {
                if (text[p] == delimiter[delimiterMatchIndex]) {
                    delimiterMatchIndex++;
                    if (delimiterMatchIndex == delimiter.length) break;
                }
                else delimiterMatchIndex = 0;
            }
            else {
                if (delimString.indexOf(text[p]) != -1) break;
            }

            if (trackPosition) updatePosition(text[p]);
            p++;
            if (p >= numChars) {    // Do we need more text?
                if (tokenStart > 0)     // Do we have room for more?
                    eof = !fillBuffer( ); // Yes, so go get some
                else {                  // No room for more characters
                    tokenEnd = p;       // so report an overflow
                    return OVERFLOW;
                }
            }
        }

        if (eof) {
            tokenEnd = p;
            return EOF;
        }

        if (includeDelimiter) {
            if (trackPosition) updatePosition(text[p]);
            p++;
            tokenEnd = p;
        }
        else if (skipDelimiter) {
            if (trackPosition) updatePosition(text[p]);
            p++;
            if (matchall) tokenEnd = p - delimiter.length;
            else tokenEnd = p - 1;
        }
        else {
            // we know the delimiter length is 1 in this case
            tokenEnd = p;
        }

        // Check the invariants before returning
        if(!(text != null && 0 <= tokenStart && tokenStart <= tokenEnd && 
            tokenEnd <= p && p <= numChars && numChars <= text.length)) {
            throw new IOException("Invariantes erroneos. 3");
            //System.exit(1);
        }
        
        return TEXT;
    }

    private void ensureChars( ) throws IOException {
        if (text == null) {
            createBuffer(maximumTokenLength);  // create text[  ], set numChars
            p = tokenStart = tokenEnd = 0;     // initialize other state
            if (trackPosition) line = column = 1;
        }
        if (!eof && p >= numChars) // Fill the text[  ] buffer if needed
            eof = !fillBuffer( );  

        // Make sure our class invariants hold true before we start a token
        if(!(text != null && 0 <= tokenStart && tokenStart <= tokenEnd && 
            tokenEnd <= p && (p < numChars || (p == numChars && eof)) &&
            numChars <= text.length)) {
            throw new IOException("Invariantes erroneos. 4");
            //System.exit(1);
        }
    }

    private void beginNewToken( ) throws IOException {
        ensureChars( );
        if (!eof) {
            tokenStart = p;
            tokenColumn = column;
            tokenLine = line;
        }
    }

    private void updatePosition(char c) {
        if (c == '\n') {
            line++;
            column = 1;
        }
        else column++;
    }
}